BST 260 Final Project Group: Chuying Ma, Jingjing Tang, Jie Yin, Xuewei Zhang

This Rmd file is for studying the relationship between other variables and mental health:

Mental health patterns in 2000 for all US counties in a map:

library(maps)
library(RColorBrewer)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)

data_2000 = read.csv("data_2000.csv")
ment_2000 <- data_2000 %>% 
  dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_00 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_00 <- inner_join(men_map_00, ment_2000, by=c("fips" = "fips") )

map_00 = ggplot(men_df_00, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_00

mental health patterns in 2001 for all US counties in a map:

data_2001 = read.csv("data_2001.csv")
ment_2001 <- data_2001 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_01 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_01 <- inner_join(men_map_01, ment_2001, by=c("fips" = "fips") )

map_01 = ggplot(men_df_01, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_01

mental health patterns in 2002 for all US counties in a map:

data_2002 = read.csv("data_2002.csv")
ment_2002 <- data_2002 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_02 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_02 <- inner_join(men_map_02, ment_2002, by=c("fips" = "fips") )

map_02 = ggplot(men_df_02, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_02

mental health patterns in 2003 for all US counties in a map:

data_2003 = read.csv("data_2003.csv")
ment_2003 <- data_2003 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_03 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_03 <- inner_join(men_map_03, ment_2003, by=c("fips" = "fips") )

map_03 = ggplot(men_df_03, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_03

mental health patterns in 2004 for all US counties in a map:

data_2004 = read.csv("data_2004.csv")
ment_2004 <- data_2004 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_04 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_04 <- inner_join(men_map_04, ment_2004, by=c("fips" = "fips") )

map_04 = ggplot(men_df_04, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_04

mental health patterns in 2005 for all US counties in a map:

data_2005 = read.csv("data_2005.csv")
ment_2005 <- data_2005 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_05 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_05 <- inner_join(men_map_05, ment_2005, by=c("fips" = "fips"))

map_05 = ggplot(men_df_05, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_05

mental health patterns in 2006 for all US counties in a map:

data_2006 = read.csv("data_2006.csv")
ment_2006 <- data_2006 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_06 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_06 <- inner_join(men_map_06, ment_2006, by=c("fips" = "fips"))

map_06 = ggplot(men_df_06, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_06

mental health patterns in 2007 for all US counties in a map:

data_2007 = read.csv("data_2007.csv")
ment_2007 <- data_2007 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_07 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_07 <- inner_join(men_map_07, ment_2007, by=c("fips" = "fips"))

map_07 = ggplot(men_df_07, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_07

mental health patterns in 2008 for all US counties in a map:

data_2008 = read.csv("2008_data.csv")
ment_2008 <- data_2008 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_08 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_08 <- inner_join(men_map_08, ment_2008, by=c("fips" = "fips"))

map_08 = ggplot(men_df_08, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_08

mental health patterns in 2009 for all US counties in a map:

data_2009 = read.csv("2009_data.csv")
ment_2009 <- data_2009 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_09 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_09 <- inner_join(men_map_09, ment_2009, by=c("fips" = "fips"))

map_09 = ggplot(men_df_09, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_09

mental health patterns in 2010 for all US counties in a map:

data_2010 = read.csv("2010_data.csv")
ment_2010 <- data_2010 %>% dplyr::select(fips,menthlth)
data(county.fips)

cnty <- map_data("county")

men_map_10 <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df_10 <- inner_join(men_map_10, ment_2010, by=c("fips" = "fips"))

map_10 = ggplot(men_df_10, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt")
map_10

map in the same plot using facet:

mental_data = data.frame()
mental_data = rbind(data_2000,data_2001,data_2002)
mental_data = rbind(mental_data,data_2003,data_2004)
mental_data = rbind(mental_data,data_2005,data_2006)
mental_data = rbind(mental_data,data_2007,data_2008,data_2009,data_2010)
write_csv(mental_data,"10_year_data.csv")
mental_data = read_csv("10_year_data.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer(),
##   year = col_integer()
## )
## See spec(...) for full column specifications.
ment <- mental_data %>% dplyr::select(fips,menthlth,year)
cnty <- map_data("county")

men_map <- cnty %>%
  mutate(polyname = paste(region,subregion,sep=",")) %>%
  left_join(county.fips, by="polyname")

men_df <- inner_join(men_map, ment, by=c("fips" = "fips"))

map_all_year = ggplot(men_df, aes(long, lat,group = group)) + 
  geom_polygon(aes(fill = menthlth)) + coord_quickmap() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "RdPu"), trans = "sqrt") +
  facet_wrap(~year,ncol = 3) +
  ggtitle("Number of Days Mental Health Not Good in US counties from 2000 to 2010")

map_all_year

write state level data for exposure:

combine with pm2.5 and ozone:

pm2.5 = read_csv("PM25_Ozone_county_2000_2012.csv")
pm2.5$COUNTY = as.numeric(pm2.5$COUNTY)
pm2.5_00 = pm2.5 %>%
  select(COUNTY,PM25_2000,Ozone_2000)
data_2000 = data_2000 %>%
  left_join(pm2.5_00,by = c("fips" = "COUNTY"))
colnames(data_2000)[48] = "pm2.5"
colnames(data_2000)[49] = "ozone"

write_csv(data_2000,"data_00_expo.csv")
pm2.5_01 = pm2.5 %>%
  select(COUNTY,PM25_2001,Ozone_2001)
data_2001 = data_2001 %>%
  left_join(pm2.5_01,by = c("fips" = "COUNTY"))
colnames(data_2001)[48] = "pm2.5"
colnames(data_2001)[49] = "ozone"

write_csv(data_2001,"data_01_expo.csv")
pm2.5_02 = pm2.5 %>%
  select(COUNTY,PM25_2002,Ozone_2002)
data_2002 = data_2002 %>%
  left_join(pm2.5_02,by = c("fips" = "COUNTY"))
colnames(data_2002)[48] = "pm2.5"
colnames(data_2002)[49] = "ozone"

write_csv(data_2002,"data_02_expo.csv")
pm2.5_03 = pm2.5 %>%
  select(COUNTY,PM25_2003,Ozone_2003)
data_2003 = data_2003 %>%
  left_join(pm2.5_03,by = c("fips" = "COUNTY"))
colnames(data_2003)[48] = "pm2.5"
colnames(data_2003)[49] = "ozone"

write_csv(data_2003,"data_03_expo.csv")
pm2.5_04 = pm2.5 %>%
  select(COUNTY,PM25_2004,Ozone_2004)
data_2004 = data_2004 %>%
  left_join(pm2.5_04,by = c("fips" = "COUNTY"))
colnames(data_2004)[48] = "pm2.5"
colnames(data_2004)[49] = "ozone"

write_csv(data_2004,"data_04_expo.csv")
pm2.5_05 = pm2.5 %>%
  select(COUNTY,PM25_2005,Ozone_2005)
data_2005 = data_2005 %>%
  left_join(pm2.5_05,by = c("fips" = "COUNTY"))
colnames(data_2005)[48] = "pm2.5"
colnames(data_2005)[49] = "ozone"

write_csv(data_2005,"data_05_expo.csv")
pm2.5_06 = pm2.5 %>%
  select(COUNTY,PM25_2006,Ozone_2006)
data_2006 = data_2006 %>%
  left_join(pm2.5_06,by = c("fips" = "COUNTY"))
colnames(data_2006)[48] = "pm2.5"
colnames(data_2006)[49] = "ozone"

write_csv(data_2006,"data_06_expo.csv")
pm2.5_07 = pm2.5 %>%
  select(COUNTY,PM25_2007,Ozone_2007)
data_2007 = data_2007 %>%
  left_join(pm2.5_07,by = c("fips" = "COUNTY"))
colnames(data_2007)[48] = "pm2.5"
colnames(data_2007)[49] = "ozone"

write_csv(data_2007,"data_07_expo.csv")
pm2.5_08 = pm2.5 %>%
  select(COUNTY,PM25_2008,Ozone_2008)
data_2008 = data_2008 %>%
  left_join(pm2.5_08,by = c("fips" = "COUNTY"))
colnames(data_2008)[48] = "pm2.5"
colnames(data_2008)[49] = "ozone"

write_csv(data_2008,"data_08_expo.csv")
pm2.5_09 = pm2.5 %>%
  select(COUNTY,PM25_2009,Ozone_2009)
data_2009 = data_2009 %>%
  left_join(pm2.5_09,by = c("fips" = "COUNTY"))
colnames(data_2009)[48] = "pm2.5"
colnames(data_2009)[49] = "ozone"

write_csv(data_2009,"data_09_expo.csv")
pm2.5_10 = pm2.5 %>%
  select(COUNTY,PM25_2010,Ozone_2010)
data_2010 = data_2010 %>%
  left_join(pm2.5_10,by = c("fips" = "COUNTY"))
colnames(data_2010)[48] = "pm2.5"
colnames(data_2010)[49] = "ozone"

write_csv(data_2010,"data_10_expo.csv")

combine all data to write a csv file for future use:

all_expo = rbind(data_2000,data_2001,data_2002,data_2003,data_2004,data_2005,data_2006,data_2007,data_2008,data_2009,data_2010)

write_csv(all_expo,"data_exposure_00_10.csv")

run linear regression for all covariates:

library(readr)
library(dplyr)
data_all = read_csv("data_3_expo.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer()
## )
## See spec(...) for full column specifications.
reg_data = data_all %>%
  dplyr::select(-c(fips,state,county,year,heartattack,ACheartdis,stroke,asthma))

full_men_model = lm(menthlth ~ .,data = reg_data)
summary(full_men_model)
## 
## Call:
## lm(formula = menthlth ~ ., data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2722  -1.0666  -0.0411   0.9740  21.1965 
## 
## Coefficients: (6 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.322e+02  3.610e+02  -0.366 0.714149    
## age           2.678e-03  1.944e-02   0.138 0.890415    
## sex           1.182e+00  6.650e-01   1.777 0.075624 .  
## white        -2.932e-02  1.384e-01  -0.212 0.832197    
## black         9.434e-02  1.912e-01   0.493 0.621805    
## asian        -5.513e-01  4.068e-01  -1.355 0.175451    
## race_other           NA         NA      NA       NA    
## educ1         1.030e+00  1.340e+00   0.769 0.442048    
## educ2         1.552e+00  5.739e-01   2.704 0.006884 ** 
## educ3                NA         NA      NA       NA    
## income1      -7.363e+00  1.303e+00  -5.651 1.71e-08 ***
## income2      -1.078e-01  1.194e+00  -0.090 0.928069    
## income3      -2.024e-01  1.115e+00  -0.182 0.855901    
## income4      -2.392e+00  9.302e-01  -2.572 0.010156 *  
## income5      -5.853e-01  8.726e-01  -0.671 0.502461    
## income6      -1.296e+00  8.307e-01  -1.561 0.118713    
## income7      -8.315e-01  8.679e-01  -0.958 0.338087    
## income8              NA         NA      NA       NA    
## married      -1.362e+00  5.790e-01  -2.352 0.018719 *  
## unmarried            NA         NA      NA       NA    
## employed     -3.149e+00  8.035e-01  -3.919 9.04e-05 ***
## out_of_work   5.473e+00  1.451e+00   3.773 0.000164 ***
## homemaker    -2.828e+00  1.183e+00  -2.390 0.016897 *  
## student      -3.301e+00  2.666e+00  -1.238 0.215814    
## employ_other         NA         NA      NA       NA    
## hlthplan     -3.380e+00  8.761e-01  -3.859 0.000116 ***
## exercise     -1.243e+00  6.962e-01  -1.786 0.074257 .  
## smoke         4.265e+00  7.806e-01   5.463 4.98e-08 ***
## drink        -3.131e+00  4.488e-01  -6.977 3.55e-12 ***
## bmi_cat1     -1.759e+00  1.466e+00  -1.200 0.230116    
## bmi_cat2     -1.531e+00  1.087e+00  -1.408 0.159127    
## bmi_cat3             NA         NA      NA       NA    
## bmi_cts      -8.387e-02  9.851e-02  -0.851 0.394641    
## genhlth_ex    1.514e+02  3.610e+02   0.419 0.675007    
## genhlth_vg    1.489e+02  3.610e+02   0.413 0.679974    
## genhlth_go    1.498e+02  3.609e+02   0.415 0.678214    
## genhlth_fa    1.515e+02  3.609e+02   0.420 0.674751    
## genhlth_po    1.627e+02  3.609e+02   0.451 0.652257    
## qlactlm       4.341e+00  7.357e-01   5.901 3.94e-09 ***
## pm2.5        -8.277e-04  1.906e-02  -0.043 0.965356    
## ozone         1.394e+01  7.910e+00   1.763 0.078051 .  
## greenness     1.469e+00  3.530e-01   4.162 3.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.192 on 3715 degrees of freedom
##   (13135 observations deleted due to missingness)
## Multiple R-squared:  0.3877, Adjusted R-squared:  0.382 
## F-statistic: 67.22 on 35 and 3715 DF,  p-value: < 2.2e-16

delete income2:

mod1 = lm(menthlth ~ age + sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod1)
## 
## Call:
## lm(formula = menthlth ~ age + sex + white + black + asian + educ1 + 
##     educ2 + income1 + income3 + income4 + income5 + income6 + 
##     income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + bmi_cat1 + 
##     bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + 
##     genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness, 
##     data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2817  -1.0664  -0.0413   0.9675  21.1811 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.340e+02  3.594e+02  -0.373 0.709296    
## age          2.257e-03  1.918e-02   0.118 0.906355    
## sex          1.154e+00  6.607e-01   1.747 0.080694 .  
## white       -2.902e-02  1.376e-01  -0.211 0.832967    
## black        9.504e-02  1.897e-01   0.501 0.616401    
## asian       -5.149e-01  3.950e-01  -1.303 0.192488    
## educ1        9.588e-01  1.313e+00   0.730 0.465443    
## educ2        1.490e+00  5.509e-01   2.705 0.006869 ** 
## income1     -7.343e+00  1.267e+00  -5.794 7.45e-09 ***
## income3     -1.800e-01  1.071e+00  -0.168 0.866523    
## income4     -2.345e+00  8.879e-01  -2.641 0.008306 ** 
## income5     -5.430e-01  8.462e-01  -0.642 0.521096    
## income6     -1.293e+00  8.018e-01  -1.613 0.106793    
## income7     -8.431e-01  8.409e-01  -1.003 0.316118    
## married     -1.348e+00  5.555e-01  -2.427 0.015278 *  
## employed    -3.133e+00  7.870e-01  -3.982 6.97e-05 ***
## out_of_work  5.417e+00  1.440e+00   3.762 0.000171 ***
## homemaker   -2.852e+00  1.173e+00  -2.432 0.015074 *  
## student     -3.242e+00  2.647e+00  -1.225 0.220729    
## hlthplan    -3.399e+00  8.625e-01  -3.941 8.25e-05 ***
## exercise    -1.286e+00  6.911e-01  -1.861 0.062830 .  
## smoke        4.293e+00  7.734e-01   5.550 3.05e-08 ***
## drink       -3.153e+00  4.426e-01  -7.123 1.26e-12 ***
## bmi_cat1    -1.765e+00  1.456e+00  -1.212 0.225573    
## bmi_cat2    -1.520e+00  1.080e+00  -1.407 0.159405    
## bmi_cts     -8.627e-02  9.788e-02  -0.881 0.378169    
## genhlth_ex   1.533e+02  3.593e+02   0.427 0.669732    
## genhlth_vg   1.508e+02  3.593e+02   0.420 0.674759    
## genhlth_go   1.516e+02  3.593e+02   0.422 0.673026    
## genhlth_fa   1.534e+02  3.593e+02   0.427 0.669538    
## genhlth_po   1.646e+02  3.593e+02   0.458 0.647021    
## qlactlm      4.286e+00  7.286e-01   5.882 4.42e-09 ***
## pm2.5        8.248e-04  1.876e-02   0.044 0.964932    
## ozone        1.383e+01  7.797e+00   1.774 0.076140 .  
## greenness    1.491e+00  3.486e-01   4.276 1.95e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.182 on 3758 degrees of freedom
##   (13093 observations deleted due to missingness)
## Multiple R-squared:  0.3902, Adjusted R-squared:  0.3847 
## F-statistic: 70.73 on 34 and 3758 DF,  p-value: < 2.2e-16

delete age:

mod2 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income3 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod2)
## 
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 + 
##     educ2 + income1 + income3 + income4 + income5 + income6 + 
##     income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + bmi_cat1 + 
##     bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + 
##     genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness, 
##     data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2667  -1.0645  -0.0422   0.9689  21.2022 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.352e+02  3.592e+02  -0.377  0.70656    
## sex          1.156e+00  6.604e-01   1.751  0.08007 .  
## white       -2.868e-02  1.376e-01  -0.208  0.83488    
## black        9.442e-02  1.896e-01   0.498  0.61851    
## asian       -5.182e-01  3.940e-01  -1.315  0.18859    
## educ1        9.475e-01  1.310e+00   0.723  0.46945    
## educ2        1.494e+00  5.500e-01   2.716  0.00665 ** 
## income1     -7.354e+00  1.263e+00  -5.821 6.35e-09 ***
## income3     -1.702e-01  1.068e+00  -0.159  0.87336    
## income4     -2.342e+00  8.874e-01  -2.639  0.00836 ** 
## income5     -5.411e-01  8.459e-01  -0.640  0.52242    
## income6     -1.297e+00  8.012e-01  -1.619  0.10557    
## income7     -8.450e-01  8.406e-01  -1.005  0.31486    
## married     -1.347e+00  5.553e-01  -2.425  0.01534 *  
## employed    -3.195e+00  5.852e-01  -5.461 5.05e-08 ***
## out_of_work  5.358e+00  1.349e+00   3.972 7.26e-05 ***
## homemaker   -2.901e+00  1.096e+00  -2.646  0.00818 ** 
## student     -3.364e+00  2.435e+00  -1.382  0.16719    
## hlthplan    -3.385e+00  8.540e-01  -3.964 7.51e-05 ***
## exercise    -1.292e+00  6.889e-01  -1.876  0.06075 .  
## smoke        4.268e+00  7.436e-01   5.739 1.03e-08 ***
## drink       -3.147e+00  4.397e-01  -7.156 9.91e-13 ***
## bmi_cat1    -1.779e+00  1.451e+00  -1.226  0.22014    
## bmi_cat2    -1.528e+00  1.078e+00  -1.417  0.15650    
## bmi_cts     -8.779e-02  9.701e-02  -0.905  0.36556    
## genhlth_ex   1.547e+02  3.591e+02   0.431  0.66657    
## genhlth_vg   1.523e+02  3.591e+02   0.424  0.67157    
## genhlth_go   1.531e+02  3.591e+02   0.426  0.66983    
## genhlth_fa   1.548e+02  3.591e+02   0.431  0.66635    
## genhlth_po   1.660e+02  3.591e+02   0.462  0.64390    
## qlactlm      4.289e+00  7.278e-01   5.893 4.12e-09 ***
## pm2.5        6.537e-04  1.870e-02   0.035  0.97212    
## ozone        1.376e+01  7.770e+00   1.771  0.07671 .  
## greenness    1.489e+00  3.482e-01   4.275 1.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.182 on 3759 degrees of freedom
##   (13093 observations deleted due to missingness)
## Multiple R-squared:  0.3902, Adjusted R-squared:  0.3849 
## F-statistic:  72.9 on 33 and 3759 DF,  p-value: < 2.2e-16

delete income3:

mod3 = lm(menthlth ~ sex + white + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod3)
## 
## Call:
## lm(formula = menthlth ~ sex + white + black + asian + educ1 + 
##     educ2 + income1 + income4 + income5 + income6 + income7 + 
##     married + employed + out_of_work + homemaker + student + 
##     hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + 
##     bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + 
##     genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2744  -1.0633  -0.0421   0.9680  21.2147 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.356e+02  3.591e+02  -0.378  0.70580    
## sex          1.145e+00  6.566e-01   1.744  0.08126 .  
## white       -2.893e-02  1.375e-01  -0.210  0.83339    
## black        9.551e-02  1.895e-01   0.504  0.61420    
## asian       -5.173e-01  3.939e-01  -1.313  0.18925    
## educ1        9.197e-01  1.298e+00   0.709  0.47861    
## educ2        1.475e+00  5.371e-01   2.746  0.00606 ** 
## income1     -7.308e+00  1.229e+00  -5.945 3.02e-09 ***
## income4     -2.314e+00  8.702e-01  -2.659  0.00787 ** 
## income5     -5.186e-01  8.339e-01  -0.622  0.53407    
## income6     -1.277e+00  7.909e-01  -1.614  0.10656    
## income7     -8.203e-01  8.261e-01  -0.993  0.32078    
## married     -1.334e+00  5.490e-01  -2.429  0.01517 *  
## employed    -3.182e+00  5.795e-01  -5.492 4.24e-08 ***
## out_of_work  5.360e+00  1.349e+00   3.974 7.19e-05 ***
## homemaker   -2.880e+00  1.088e+00  -2.646  0.00818 ** 
## student     -3.372e+00  2.434e+00  -1.386  0.16591    
## hlthplan    -3.359e+00  8.375e-01  -4.010 6.18e-05 ***
## exercise    -1.294e+00  6.887e-01  -1.880  0.06023 .  
## smoke        4.264e+00  7.431e-01   5.737 1.04e-08 ***
## drink       -3.139e+00  4.368e-01  -7.186 7.99e-13 ***
## bmi_cat1    -1.791e+00  1.449e+00  -1.236  0.21650    
## bmi_cat2    -1.538e+00  1.076e+00  -1.430  0.15288    
## bmi_cts     -8.859e-02  9.687e-02  -0.915  0.36049    
## genhlth_ex   1.550e+02  3.590e+02   0.432  0.66587    
## genhlth_vg   1.526e+02  3.590e+02   0.425  0.67088    
## genhlth_go   1.534e+02  3.590e+02   0.427  0.66915    
## genhlth_fa   1.551e+02  3.590e+02   0.432  0.66567    
## genhlth_po   1.663e+02  3.590e+02   0.463  0.64322    
## qlactlm      4.283e+00  7.265e-01   5.895 4.08e-09 ***
## pm2.5        8.487e-04  1.866e-02   0.045  0.96372    
## ozone        1.379e+01  7.767e+00   1.775  0.07596 .  
## greenness    1.492e+00  3.477e-01   4.291 1.82e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.182 on 3760 degrees of freedom
##   (13093 observations deleted due to missingness)
## Multiple R-squared:  0.3902, Adjusted R-squared:  0.385 
## F-statistic: 75.19 on 32 and 3760 DF,  p-value: < 2.2e-16

delete white:

mod4 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod4)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 + 
##     income1 + income4 + income5 + income6 + income7 + married + 
##     employed + out_of_work + homemaker + student + hlthplan + 
##     exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + 
##     genhlth_ex + genhlth_vg + genhlth_go + genhlth_fa + genhlth_po + 
##     qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3060  -1.0587  -0.0427   0.9594  21.1859 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.369e+02  3.590e+02  -0.381  0.70293    
## sex          1.151e+00  6.557e-01   1.755  0.07928 .  
## black        1.351e-01  1.502e-01   0.899  0.36860    
## asian       -3.681e-01  3.662e-01  -1.005  0.31483    
## educ1        9.121e-01  1.297e+00   0.703  0.48189    
## educ2        1.495e+00  5.358e-01   2.791  0.00528 ** 
## income1     -7.303e+00  1.228e+00  -5.947 2.99e-09 ***
## income4     -2.279e+00  8.692e-01  -2.622  0.00877 ** 
## income5     -5.108e-01  8.319e-01  -0.614  0.53927    
## income6     -1.271e+00  7.896e-01  -1.610  0.10747    
## income7     -8.344e-01  8.250e-01  -1.011  0.31190    
## married     -1.285e+00  5.474e-01  -2.348  0.01893 *  
## employed    -3.170e+00  5.788e-01  -5.476 4.64e-08 ***
## out_of_work  5.329e+00  1.347e+00   3.957 7.73e-05 ***
## homemaker   -2.845e+00  1.088e+00  -2.616  0.00893 ** 
## student     -3.195e+00  2.429e+00  -1.315  0.18844    
## hlthplan    -3.434e+00  8.366e-01  -4.105 4.12e-05 ***
## exercise    -1.324e+00  6.877e-01  -1.926  0.05423 .  
## smoke        4.271e+00  7.418e-01   5.757 9.25e-09 ***
## drink       -3.123e+00  4.362e-01  -7.159 9.73e-13 ***
## bmi_cat1    -1.733e+00  1.447e+00  -1.198  0.23107    
## bmi_cat2    -1.517e+00  1.075e+00  -1.411  0.15830    
## bmi_cts     -8.471e-02  9.681e-02  -0.875  0.38165    
## genhlth_ex   1.563e+02  3.589e+02   0.436  0.66322    
## genhlth_vg   1.538e+02  3.589e+02   0.429  0.66830    
## genhlth_go   1.546e+02  3.589e+02   0.431  0.66662    
## genhlth_fa   1.564e+02  3.589e+02   0.436  0.66305    
## genhlth_po   1.676e+02  3.589e+02   0.467  0.64051    
## qlactlm      4.226e+00  7.248e-01   5.830 6.01e-09 ***
## pm2.5        4.473e-04  1.862e-02   0.024  0.98083    
## ozone        1.365e+01  7.735e+00   1.764  0.07775 .  
## greenness    1.505e+00  3.462e-01   4.349 1.41e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.181 on 3773 degrees of freedom
##   (13081 observations deleted due to missingness)
## Multiple R-squared:   0.39,  Adjusted R-squared:  0.385 
## F-statistic: 77.81 on 31 and 3773 DF,  p-value: < 2.2e-16

delete genhlth_vg:

mod5 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income5 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod5)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 + 
##     income1 + income4 + income5 + income6 + income7 + married + 
##     employed + out_of_work + homemaker + student + hlthplan + 
##     exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + 
##     genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + 
##     pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3048  -1.0580  -0.0423   0.9593  21.1851 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16.9059315  3.8083300   4.439 9.29e-06 ***
## sex          1.1545614  0.6555309   1.761  0.07828 .  
## black        0.1350305  0.1501968   0.899  0.36870    
## asian       -0.3668663  0.3661573  -1.002  0.31644    
## educ1        0.9039184  1.2965802   0.697  0.48575    
## educ2        1.4915840  0.5356231   2.785  0.00538 ** 
## income1     -7.3075294  1.2279432  -5.951 2.91e-09 ***
## income4     -2.2781783  0.8691468  -2.621  0.00880 ** 
## income5     -0.5170001  0.8316940  -0.622  0.53423    
## income6     -1.2732501  0.7895452  -1.613  0.10691    
## income7     -0.8456256  0.8245048  -1.026  0.30514    
## married     -1.2867348  0.5473429  -2.351  0.01878 *  
## employed    -3.1773746  0.5784989  -5.492 4.23e-08 ***
## out_of_work  5.3258498  1.3465454   3.955 7.79e-05 ***
## homemaker   -2.8495650  1.0874395  -2.620  0.00882 ** 
## student     -3.2042958  2.4285592  -1.319  0.18711    
## hlthplan    -3.4410065  0.8363564  -4.114 3.97e-05 ***
## exercise    -1.3249926  0.6876680  -1.927  0.05408 .  
## smoke        4.2618061  0.7414596   5.748 9.75e-09 ***
## drink       -3.1209840  0.4361487  -7.156 9.95e-13 ***
## bmi_cat1    -1.7450688  1.4464325  -1.206  0.22771    
## bmi_cat2    -1.5219590  1.0751466  -1.416  0.15698    
## bmi_cts     -0.0850834  0.0967951  -0.879  0.37945    
## genhlth_ex   2.5089983  0.9950005   2.522  0.01172 *  
## genhlth_go   0.8268782  0.8226417   1.005  0.31489    
## genhlth_fa   2.5939973  1.0807838   2.400  0.01644 *  
## genhlth_po  13.8189923  1.4273400   9.682  < 2e-16 ***
## qlactlm      4.2227559  0.7247162   5.827 6.12e-09 ***
## pm2.5        0.0003147  0.0186119   0.017  0.98651    
## ozone       13.7257738  7.7322703   1.775  0.07596 .  
## greenness    1.5066414  0.3461142   4.353 1.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.181 on 3774 degrees of freedom
##   (13081 observations deleted due to missingness)
## Multiple R-squared:   0.39,  Adjusted R-squared:  0.3851 
## F-statistic: 80.42 on 30 and 3774 DF,  p-value: < 2.2e-16

delete income5:

mod6 = lm(menthlth ~ sex + black + asian + educ1 + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod6)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ1 + educ2 + 
##     income1 + income4 + income6 + income7 + married + employed + 
##     out_of_work + homemaker + student + hlthplan + exercise + 
##     smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + 
##     genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + 
##     ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2400  -1.0615  -0.0447   0.9645  21.1293 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16.7970457  3.8039904   4.416 1.04e-05 ***
## sex          1.1666259  0.6551902   1.781  0.07506 .  
## black        0.1392310  0.1500326   0.928  0.35346    
## asian       -0.3613184  0.3660188  -0.987  0.32363    
## educ1        0.9163116  1.2963215   0.707  0.47970    
## educ2        1.4154634  0.5213950   2.715  0.00666 ** 
## income1     -7.1717625  1.2082655  -5.936 3.19e-09 ***
## income4     -2.2455138  0.8674864  -2.589  0.00968 ** 
## income6     -1.2078350  0.7824373  -1.544  0.12275    
## income7     -0.7670658  0.8146955  -0.942  0.34649    
## married     -1.2712945  0.5467346  -2.325  0.02011 *  
## employed    -3.1449712  0.5760988  -5.459 5.09e-08 ***
## out_of_work  5.3854958  1.3430132   4.010 6.19e-05 ***
## homemaker   -2.8026699  1.0847314  -2.584  0.00981 ** 
## student     -3.2040596  2.4283617  -1.319  0.18710    
## hlthplan    -3.3971077  0.8333019  -4.077 4.66e-05 ***
## exercise    -1.3410662  0.6871259  -1.952  0.05105 .  
## smoke        4.2415793  0.7406851   5.727 1.10e-08 ***
## drink       -3.1214711  0.4361125  -7.157 9.83e-13 ***
## bmi_cat1    -1.7463674  1.4463134  -1.207  0.22733    
## bmi_cat2    -1.5368974  1.0747906  -1.430  0.15281    
## bmi_cts     -0.0868244  0.0967467  -0.897  0.36954    
## genhlth_ex   2.5422568  0.9934802   2.559  0.01054 *  
## genhlth_go   0.8205911  0.8225127   0.998  0.31851    
## genhlth_fa   2.6165495  1.0800869   2.423  0.01546 *  
## genhlth_po  13.8579888  1.4258449   9.719  < 2e-16 ***
## qlactlm      4.2151283  0.7245534   5.818 6.47e-09 ***
## pm2.5        0.0006723  0.0186015   0.036  0.97117    
## ozone       13.9745241  7.7212811   1.810  0.07040 .  
## greenness    1.5185102  0.3455590   4.394 1.14e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.18 on 3775 degrees of freedom
##   (13081 observations deleted due to missingness)
## Multiple R-squared:  0.3899, Adjusted R-squared:  0.3852 
## F-statistic: 83.19 on 29 and 3775 DF,  p-value: < 2.2e-16

delete educ1:

mod7 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod7)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 + 
##     income4 + income6 + income7 + married + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     bmi_cat1 + bmi_cat2 + bmi_cts + genhlth_ex + genhlth_go + 
##     genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness, 
##     data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1891  -1.0537  -0.0430   0.9667  21.1571 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 16.931884   3.792287   4.465 8.25e-06 ***
## sex          1.132782   0.653317   1.734  0.08302 .  
## black        0.136321   0.149746   0.910  0.36270    
## asian       -0.362910   0.365669  -0.992  0.32104    
## educ2        1.350864   0.509700   2.650  0.00808 ** 
## income1     -6.978244   1.174115  -5.943 3.04e-09 ***
## income4     -2.237425   0.865841  -2.584  0.00980 ** 
## income6     -1.204154   0.781912  -1.540  0.12364    
## income7     -0.827323   0.811953  -1.019  0.30830    
## married     -1.240633   0.545939  -2.272  0.02311 *  
## employed    -3.129805   0.575115  -5.442 5.60e-08 ***
## out_of_work  5.397968   1.341426   4.024 5.83e-05 ***
## homemaker   -2.740568   1.080720  -2.536  0.01126 *  
## student     -3.370241   2.422361  -1.391  0.16421    
## hlthplan    -3.462430   0.825121  -4.196 2.78e-05 ***
## exercise    -1.382524   0.684225  -2.021  0.04339 *  
## smoke        4.194612   0.736235   5.697 1.31e-08 ***
## drink       -3.145698   0.434747  -7.236 5.58e-13 ***
## bmi_cat1    -1.737268   1.445301  -1.202  0.22943    
## bmi_cat2    -1.518598   1.074117  -1.414  0.15750    
## bmi_cts     -0.087568   0.096671  -0.906  0.36508    
## genhlth_ex   2.520727   0.992915   2.539  0.01117 *  
## genhlth_go   0.851434   0.819769   1.039  0.29904    
## genhlth_fa   2.781992   1.057813   2.630  0.00857 ** 
## genhlth_po  14.073221   1.390628  10.120  < 2e-16 ***
## qlactlm      4.152048   0.719144   5.774 8.38e-09 ***
## pm2.5        0.002109   0.018546   0.114  0.90948    
## ozone       14.018889   7.712750   1.818  0.06920 .  
## greenness    1.494825   0.344899   4.334 1.50e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.18 on 3779 degrees of freedom
##   (13078 observations deleted due to missingness)
## Multiple R-squared:  0.3902, Adjusted R-squared:  0.3856 
## F-statistic: 86.35 on 28 and 3779 DF,  p-value: < 2.2e-16

delete bmi_cts:

mod8 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod8)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 + 
##     income4 + income6 + income7 + married + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     bmi_cat1 + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + 
##     genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2559  -1.0592  -0.0402   0.9714  21.1316 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.809691   1.581582   8.732  < 2e-16 ***
## sex          1.108766   0.652763   1.699  0.08948 .  
## black        0.131590   0.149651   0.879  0.37929    
## asian       -0.355522   0.365569  -0.973  0.33086    
## educ2        1.338162   0.509495   2.626  0.00866 ** 
## income1     -6.991240   1.173999  -5.955 2.84e-09 ***
## income4     -2.229043   0.865771  -2.575  0.01007 *  
## income6     -1.190139   0.781740  -1.522  0.12799    
## income7     -0.804173   0.811532  -0.991  0.32178    
## married     -1.231360   0.545830  -2.256  0.02413 *  
## employed    -3.152500   0.574555  -5.487 4.36e-08 ***
## out_of_work  5.337033   1.339706   3.984 6.91e-05 ***
## homemaker   -2.722302   1.080507  -2.519  0.01179 *  
## student     -3.255842   2.419009  -1.346  0.17840    
## hlthplan    -3.432160   0.824425  -4.163 3.21e-05 ***
## exercise    -1.336410   0.682312  -1.959  0.05023 .  
## smoke        4.230261   0.735165   5.754 9.40e-09 ***
## drink       -3.142121   0.434718  -7.228 5.90e-13 ***
## bmi_cat1    -0.596659   0.709483  -0.841  0.40041    
## bmi_cat2    -0.825977   0.754364  -1.095  0.27362    
## genhlth_ex   2.567698   0.991537   2.590  0.00965 ** 
## genhlth_go   0.852631   0.819748   1.040  0.29835    
## genhlth_fa   2.769813   1.057703   2.619  0.00886 ** 
## genhlth_po  14.080355   1.390573  10.126  < 2e-16 ***
## qlactlm      4.097882   0.716637   5.718 1.16e-08 ***
## pm2.5        0.001368   0.018527   0.074  0.94116    
## ozone       14.239468   7.708722   1.847  0.06480 .  
## greenness    1.490424   0.344856   4.322 1.59e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.18 on 3780 degrees of freedom
##   (13078 observations deleted due to missingness)
## Multiple R-squared:   0.39,  Adjusted R-squared:  0.3857 
## F-statistic: 89.52 on 27 and 3780 DF,  p-value: < 2.2e-16

delete bmi_cat1:

mod9 =  lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod9)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 + 
##     income4 + income6 + income7 + married + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     bmi_cat2 + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + 
##     qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2694  -1.0571  -0.0408   0.9764  21.0710 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.455543   1.524429   8.827  < 2e-16 ***
## sex          1.069038   0.651026   1.642  0.10066    
## black        0.140336   0.149283   0.940  0.34725    
## asian       -0.370013   0.365148  -1.013  0.31097    
## educ2        1.414777   0.501264   2.822  0.00479 ** 
## income1     -6.946582   1.172752  -5.923 3.44e-09 ***
## income4     -2.190794   0.864542  -2.534  0.01132 *  
## income6     -1.144794   0.779848  -1.468  0.14220    
## income7     -0.777008   0.810857  -0.958  0.33800    
## married     -1.199491   0.544492  -2.203  0.02766 *  
## employed    -3.107439   0.572029  -5.432 5.91e-08 ***
## out_of_work  5.386927   1.338340   4.025 5.81e-05 ***
## homemaker   -2.718863   1.080457  -2.516  0.01190 *  
## student     -3.252090   2.418911  -1.344  0.17889    
## hlthplan    -3.417345   0.824205  -4.146 3.45e-05 ***
## exercise    -1.400126   0.678067  -2.065  0.03900 *  
## smoke        4.150608   0.729009   5.693 1.34e-08 ***
## drink       -3.164315   0.433900  -7.293 3.68e-13 ***
## bmi_cat2    -0.524877   0.663958  -0.791  0.42927    
## genhlth_ex   2.428255   0.977537   2.484  0.01303 *  
## genhlth_go   0.934257   0.813950   1.148  0.25112    
## genhlth_fa   2.867366   1.051282   2.727  0.00641 ** 
## genhlth_po  14.126455   1.389438  10.167  < 2e-16 ***
## qlactlm      4.128102   0.715708   5.768 8.67e-09 ***
## pm2.5        0.002069   0.018508   0.112  0.91100    
## ozone       13.982930   7.702386   1.815  0.06954 .  
## greenness    1.502176   0.344560   4.360 1.34e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.18 on 3781 degrees of freedom
##   (13078 observations deleted due to missingness)
## Multiple R-squared:  0.3899, Adjusted R-squared:  0.3857 
## F-statistic: 92.94 on 26 and 3781 DF,  p-value: < 2.2e-16

delete bmi_cat2:

mod10 = lm(menthlth ~ sex + black + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod10)
## 
## Call:
## lm(formula = menthlth ~ sex + black + asian + educ2 + income1 + 
##     income4 + income6 + income7 + married + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + 
##     pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2751  -1.0612  -0.0432   0.9703  20.9489 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.233353   1.498220   8.833  < 2e-16 ***
## sex          1.106615   0.649257   1.704  0.08838 .  
## black        0.136771   0.149208   0.917  0.35939    
## asian       -0.370158   0.365130  -1.014  0.31076    
## educ2        1.387587   0.500058   2.775  0.00555 ** 
## income1     -6.952568   1.172670  -5.929 3.32e-09 ***
## income4     -2.171941   0.864170  -2.513  0.01200 *  
## income6     -1.159868   0.779577  -1.488  0.13688    
## income7     -0.808770   0.809821  -0.999  0.31800    
## married     -1.232731   0.542839  -2.271  0.02321 *  
## employed    -3.080563   0.570990  -5.395 7.27e-08 ***
## out_of_work  5.427554   1.337287   4.059 5.04e-05 ***
## homemaker   -2.704300   1.080246  -2.503  0.01234 *  
## student     -3.178348   2.416992  -1.315  0.18859    
## hlthplan    -3.393198   0.823598  -4.120 3.87e-05 ***
## exercise    -1.416326   0.677723  -2.090  0.03670 *  
## smoke        4.147400   0.728962   5.689 1.37e-08 ***
## drink       -3.178133   0.433526  -7.331 2.78e-13 ***
## genhlth_ex   2.461532   0.976582   2.521  0.01176 *  
## genhlth_go   0.955399   0.813471   1.174  0.24028    
## genhlth_fa   2.891422   1.050789   2.752  0.00596 ** 
## genhlth_po  14.153748   1.388940  10.190  < 2e-16 ***
## qlactlm      4.154299   0.714905   5.811 6.72e-09 ***
## pm2.5        0.002191   0.018506   0.118  0.90575    
## ozone       13.926352   7.701671   1.808  0.07065 .  
## greenness    1.500980   0.344539   4.356 1.36e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.18 on 3782 degrees of freedom
##   (13078 observations deleted due to missingness)
## Multiple R-squared:  0.3898, Adjusted R-squared:  0.3858 
## F-statistic: 96.65 on 25 and 3782 DF,  p-value: < 2.2e-16

delete black:

mod11 = lm(menthlth ~ sex + asian + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod11)
## 
## Call:
## lm(formula = menthlth ~ sex + asian + educ2 + income1 + income4 + 
##     income6 + income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + genhlth_ex + 
##     genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + 
##     ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2626  -1.0407  -0.0406   0.9574  21.0823 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.1316475  1.4555314   9.022  < 2e-16 ***
## sex          1.3204685  0.6324717   2.088  0.03688 *  
## asian       -0.1872403  0.2658969  -0.704  0.48136    
## educ2        1.3279972  0.4858312   2.733  0.00630 ** 
## income1     -6.9002780  1.1488503  -6.006 2.07e-09 ***
## income4     -2.2420937  0.8440959  -2.656  0.00793 ** 
## income6     -1.0846417  0.7564290  -1.434  0.15168    
## income7     -0.7967446  0.7859403  -1.014  0.31077    
## married     -1.2526395  0.5193240  -2.412  0.01591 *  
## employed    -3.1059396  0.5549723  -5.597 2.33e-08 ***
## out_of_work  5.3144316  1.3061909   4.069 4.82e-05 ***
## homemaker   -2.8937121  1.0514939  -2.752  0.00595 ** 
## student     -3.7268272  2.2898653  -1.628  0.10370    
## hlthplan    -3.4181576  0.8010686  -4.267 2.03e-05 ***
## exercise    -1.3927426  0.6581059  -2.116  0.03438 *  
## smoke        4.3199347  0.7089008   6.094 1.21e-09 ***
## drink       -3.2331399  0.4193523  -7.710 1.58e-14 ***
## genhlth_ex   2.4471046  0.9505498   2.574  0.01008 *  
## genhlth_go   1.0592587  0.7930507   1.336  0.18173    
## genhlth_fa   2.9364303  1.0255170   2.863  0.00421 ** 
## genhlth_po  14.1865856  1.3531702  10.484  < 2e-16 ***
## qlactlm      3.9966302  0.6900912   5.791 7.52e-09 ***
## pm2.5       -0.0008375  0.0176943  -0.047  0.96225    
## ozone       15.2872421  7.3472445   2.081  0.03753 *  
## greenness    1.5439223  0.3303762   4.673 3.06e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.151 on 3950 degrees of freedom
##   (12911 observations deleted due to missingness)
## Multiple R-squared:  0.3987, Adjusted R-squared:  0.395 
## F-statistic: 109.1 on 24 and 3950 DF,  p-value: < 2.2e-16

delete asian:

mod12 = lm(menthlth ~ sex + educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod12)
## 
## Call:
## lm(formula = menthlth ~ sex + educ2 + income1 + income4 + income6 + 
##     income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + genhlth_ex + 
##     genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + 
##     ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.6846  -1.2947  -0.0493   1.1526  22.6382 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.255664   0.916346  11.192  < 2e-16 ***
## sex         -0.061897   0.406604  -0.152 0.879010    
## educ2        0.571854   0.311227   1.837 0.066174 .  
## income1     -0.750505   0.713574  -1.052 0.292934    
## income4      0.332985   0.551755   0.604 0.546187    
## income6      0.400030   0.476289   0.840 0.400988    
## income7     -0.874572   0.489511  -1.787 0.074025 .  
## married     -0.122057   0.344987  -0.354 0.723493    
## employed    -1.831612   0.367889  -4.979 6.49e-07 ***
## out_of_work  4.015560   0.887239   4.526 6.07e-06 ***
## homemaker   -1.301440   0.681900  -1.909 0.056345 .  
## student     -7.871992   1.356813  -5.802 6.73e-09 ***
## hlthplan    -1.802891   0.517532  -3.484 0.000497 ***
## exercise    -2.011671   0.408545  -4.924 8.60e-07 ***
## smoke        6.260483   0.438637  14.273  < 2e-16 ***
## drink       -2.939469   0.274782 -10.697  < 2e-16 ***
## genhlth_ex   0.993972   0.587334   1.692 0.090608 .  
## genhlth_go  -0.012063   0.483617  -0.025 0.980101    
## genhlth_fa   2.789457   0.631444   4.418 1.01e-05 ***
## genhlth_po  12.451268   0.836273  14.889  < 2e-16 ***
## qlactlm      2.479303   0.446270   5.556 2.83e-08 ***
## pm2.5        0.004621   0.013444   0.344 0.731040    
## ozone       32.239021   5.646010   5.710 1.16e-08 ***
## greenness    2.889014   0.261178  11.061  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.768 on 11367 degrees of freedom
##   (5495 observations deleted due to missingness)
## Multiple R-squared:  0.302,  Adjusted R-squared:  0.3006 
## F-statistic: 213.9 on 23 and 11367 DF,  p-value: < 2.2e-16

delete sex:

mod13 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod13)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 + 
##     income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + genhlth_ex + 
##     genhlth_go + genhlth_fa + genhlth_po + qlactlm + pm2.5 + 
##     ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.6824  -1.2946  -0.0497   1.1532  22.6351 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.206697   0.857994  11.896  < 2e-16 ***
## educ2        0.573939   0.310912   1.846 0.064921 .  
## income1     -0.750924   0.713538  -1.052 0.292641    
## income4      0.334825   0.551598   0.607 0.543857    
## income6      0.402370   0.476021   0.845 0.397973    
## income7     -0.872864   0.489361  -1.784 0.074502 .  
## married     -0.112655   0.339399  -0.332 0.739951    
## employed    -1.830517   0.367803  -4.977 6.56e-07 ***
## out_of_work  4.016963   0.887153   4.528 6.02e-06 ***
## homemaker   -1.322031   0.668321  -1.978 0.047937 *  
## student     -7.871466   1.356751  -5.802 6.74e-09 ***
## hlthplan    -1.805689   0.517184  -3.491 0.000482 ***
## exercise    -2.007987   0.407810  -4.924 8.61e-07 ***
## smoke        6.262179   0.438477  14.282  < 2e-16 ***
## drink       -2.932186   0.270573 -10.837  < 2e-16 ***
## genhlth_ex   0.992737   0.587253   1.690 0.090964 .  
## genhlth_go  -0.012455   0.483589  -0.026 0.979453    
## genhlth_fa   2.791423   0.631285   4.422 9.88e-06 ***
## genhlth_po  12.454114   0.836028  14.897  < 2e-16 ***
## qlactlm      2.480392   0.446194   5.559 2.77e-08 ***
## pm2.5        0.004536   0.013431   0.338 0.735610    
## ozone       32.281336   5.638920   5.725 1.06e-08 ***
## greenness    2.887478   0.260972  11.064  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.768 on 11368 degrees of freedom
##   (5495 observations deleted due to missingness)
## Multiple R-squared:  0.302,  Adjusted R-squared:  0.3007 
## F-statistic: 223.6 on 22 and 11368 DF,  p-value: < 2.2e-16

delete genhlth_go:

mod14 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + married + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod14)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 + 
##     income7 + married + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + genhlth_ex + 
##     genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness, 
##     data = reg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.680  -1.294  -0.050   1.153  22.634 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.197105   0.772912  13.193  < 2e-16 ***
## educ2        0.573085   0.309129   1.854  0.06378 .  
## income1     -0.751498   0.713158  -1.054  0.29201    
## income4      0.334921   0.551562   0.607  0.54372    
## income6      0.402592   0.475922   0.846  0.39761    
## income7     -0.872368   0.488961  -1.784  0.07443 .  
## married     -0.112239   0.338998  -0.331  0.74058    
## employed    -1.829920   0.367057  -4.985 6.27e-07 ***
## out_of_work  4.017215   0.887060   4.529 6.00e-06 ***
## homemaker   -1.322044   0.668292  -1.978  0.04793 *  
## student     -7.871583   1.356683  -5.802 6.72e-09 ***
## hlthplan    -1.804020   0.513087  -3.516  0.00044 ***
## exercise    -2.007095   0.406319  -4.940 7.94e-07 ***
## smoke        6.261979   0.438389  14.284  < 2e-16 ***
## drink       -2.931424   0.268940 -10.900  < 2e-16 ***
## genhlth_ex   0.999657   0.522162   1.914  0.05559 .  
## genhlth_fa   2.798601   0.566406   4.941 7.88e-07 ***
## genhlth_po  12.461662   0.782942  15.916  < 2e-16 ***
## qlactlm      2.479634   0.445202   5.570 2.61e-08 ***
## pm2.5        0.004525   0.013425   0.337  0.73607    
## ozone       32.290143   5.628296   5.737 9.88e-09 ***
## greenness    2.887963   0.260282  11.096  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.768 on 11369 degrees of freedom
##   (5495 observations deleted due to missingness)
## Multiple R-squared:  0.302,  Adjusted R-squared:  0.3007 
## F-statistic: 234.3 on 21 and 11369 DF,  p-value: < 2.2e-16

delete married:

mod15 = lm(menthlth ~ educ2 + income1 + income4 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod15)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income4 + income6 + 
##     income7 + employed + out_of_work + homemaker + student + 
##     hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + 
##     genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.6631  -1.2933  -0.0495   1.1533  22.6260 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.150332   0.759862  13.358  < 2e-16 ***
## educ2        0.567198   0.308605   1.838 0.066097 .  
## income1     -0.702147   0.697380  -1.007 0.314035    
## income4      0.346595   0.550412   0.630 0.528902    
## income6      0.392301   0.474887   0.826 0.408769    
## income7     -0.897508   0.483010  -1.858 0.063173 .  
## employed    -1.842952   0.364926  -5.050 4.48e-07 ***
## out_of_work  4.028979   0.886313   4.546 5.53e-06 ***
## homemaker   -1.361293   0.657668  -2.070 0.038486 *  
## student     -7.818007   1.346946  -5.804 6.64e-09 ***
## hlthplan    -1.816531   0.511673  -3.550 0.000387 ***
## exercise    -2.011937   0.406040  -4.955 7.34e-07 ***
## smoke        6.277095   0.435988  14.397  < 2e-16 ***
## drink       -2.920737   0.266986 -10.940  < 2e-16 ***
## genhlth_ex   1.002233   0.522083   1.920 0.054923 .  
## genhlth_fa   2.810562   0.565231   4.972 6.71e-07 ***
## genhlth_po  12.464606   0.782861  15.922  < 2e-16 ***
## qlactlm      2.477284   0.445128   5.565 2.68e-08 ***
## pm2.5        0.004938   0.013366   0.369 0.711816    
## ozone       32.206553   5.622410   5.728 1.04e-08 ***
## greenness    2.886521   0.260235  11.092  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.767 on 11370 degrees of freedom
##   (5495 observations deleted due to missingness)
## Multiple R-squared:  0.302,  Adjusted R-squared:  0.3008 
## F-statistic:   246 on 20 and 11370 DF,  p-value: < 2.2e-16

delete income4:

mod16 = lm(menthlth ~ educ2 + income1 + income6 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod16)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income6 + income7 + 
##     employed + out_of_work + homemaker + student + hlthplan + 
##     exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + 
##     qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.7490  -1.2902  -0.0484   1.1506  22.7061 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.267050   0.742162  13.834  < 2e-16 ***
## educ2        0.602904   0.304090   1.983 0.047430 *  
## income1     -0.792955   0.689181  -1.151 0.249930    
## income6      0.349901   0.470356   0.744 0.456948    
## income7     -0.987105   0.473388  -2.085 0.037074 *  
## employed    -1.863608   0.362637  -5.139 2.81e-07 ***
## out_of_work  4.022371   0.885837   4.541 5.66e-06 ***
## homemaker   -1.390504   0.655896  -2.120 0.034027 *  
## student     -7.829242   1.343951  -5.826 5.85e-09 ***
## hlthplan    -1.853019   0.509587  -3.636 0.000278 ***
## exercise    -2.009024   0.405708  -4.952 7.45e-07 ***
## smoke        6.266625   0.435244  14.398  < 2e-16 ***
## drink       -2.933354   0.266005 -11.027  < 2e-16 ***
## genhlth_ex   0.986282   0.521525   1.891 0.058630 .  
## genhlth_fa   2.832592   0.564557   5.017 5.32e-07 ***
## genhlth_po  12.443779   0.782558  15.901  < 2e-16 ***
## qlactlm      2.479736   0.444978   5.573 2.57e-08 ***
## pm2.5        0.003508   0.013286   0.264 0.791767    
## ozone       32.146947   5.615236   5.725 1.06e-08 ***
## greenness    2.888105   0.260095  11.104  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.767 on 11382 degrees of freedom
##   (5484 observations deleted due to missingness)
## Multiple R-squared:  0.3019, Adjusted R-squared:  0.3008 
## F-statistic: 259.1 on 19 and 11382 DF,  p-value: < 2.2e-16

delete income6:

mod17 = lm(menthlth ~ educ2 + income1 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod17)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income1 + income7 + employed + 
##     out_of_work + homemaker + student + hlthplan + exercise + 
##     smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + 
##     pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.782  -1.288  -0.048   1.154  22.693 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.313364   0.738347  13.968  < 2e-16 ***
## educ2        0.623898   0.302038   2.066 0.038886 *  
## income1     -0.860746   0.683104  -1.260 0.207677    
## income7     -1.048633   0.467090  -2.245 0.024785 *  
## employed    -1.847903   0.362151  -5.103 3.40e-07 ***
## out_of_work  3.990747   0.884935   4.510 6.56e-06 ***
## homemaker   -1.402596   0.655731  -2.139 0.032459 *  
## student     -7.839802   1.343049  -5.837 5.45e-09 ***
## hlthplan    -1.829015   0.508746  -3.595 0.000326 ***
## exercise    -1.994564   0.405384  -4.920 8.77e-07 ***
## smoke        6.295352   0.433862  14.510  < 2e-16 ***
## drink       -2.941625   0.265832 -11.066  < 2e-16 ***
## genhlth_ex   0.972403   0.521240   1.866 0.062129 .  
## genhlth_fa   2.807990   0.563156   4.986 6.25e-07 ***
## genhlth_po  12.397304   0.780740  15.879  < 2e-16 ***
## qlactlm      2.484758   0.444737   5.587 2.36e-08 ***
## pm2.5        0.003098   0.013272   0.233 0.815439    
## ozone       32.069417   5.611449   5.715 1.12e-08 ***
## greenness    2.876051   0.259553  11.081  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.767 on 11384 degrees of freedom
##   (5483 observations deleted due to missingness)
## Multiple R-squared:  0.3019, Adjusted R-squared:  0.3008 
## F-statistic: 273.5 on 18 and 11384 DF,  p-value: < 2.2e-16

delete income1:

mod18 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod18)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5 + 
##     ozone + greenness, data = reg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.544  -1.304  -0.049   1.165  22.762 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.3135973  0.7240722  14.244  < 2e-16 ***
## educ2        0.6468932  0.3008295   2.150 0.031547 *  
## income7     -0.9626900  0.4623028  -2.082 0.037330 *  
## employed    -1.8684210  0.3599958  -5.190 2.14e-07 ***
## out_of_work  3.9239208  0.8813191   4.452 8.57e-06 ***
## homemaker   -1.4021883  0.6518621  -2.151 0.031493 *  
## student     -7.9603739  1.3276376  -5.996 2.08e-09 ***
## hlthplan    -1.7880862  0.4993371  -3.581 0.000344 ***
## exercise    -2.0982285  0.4033926  -5.201 2.01e-07 ***
## smoke        6.2490540  0.4319204  14.468  < 2e-16 ***
## drink       -2.9527947  0.2640249 -11.184  < 2e-16 ***
## genhlth_ex   0.9570070  0.5187859   1.845 0.065106 .  
## genhlth_fa   2.6284833  0.5539079   4.745 2.11e-06 ***
## genhlth_po  12.0485502  0.7658576  15.732  < 2e-16 ***
## qlactlm      2.4617039  0.4432345   5.554 2.85e-08 ***
## pm2.5        0.0007633  0.0132234   0.058 0.953971    
## ozone       33.2546829  5.5915006   5.947 2.80e-09 ***
## greenness    2.9106861  0.2586182  11.255  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.769 on 11486 degrees of freedom
##   (5382 observations deleted due to missingness)
## Multiple R-squared:  0.3036, Adjusted R-squared:  0.3025 
## F-statistic: 294.5 on 17 and 11486 DF,  p-value: < 2.2e-16

delete genhlth_ex:

mod19 = lm(menthlth ~ educ2 + income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod19)
## 
## Call:
## lm(formula = menthlth ~ educ2 + income7 + employed + out_of_work + 
##     homemaker + student + hlthplan + exercise + smoke + drink + 
##     genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness, 
##     data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.5456  -1.3045  -0.0468   1.1641  22.5928 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.454769   0.720140  14.518  < 2e-16 ***
## educ2        0.526837   0.293471   1.795  0.07265 .  
## income7     -0.963099   0.462321  -2.083  0.03726 *  
## employed    -1.803972   0.358376  -5.034 4.88e-07 ***
## out_of_work  3.936364   0.880367   4.471 7.85e-06 ***
## homemaker   -1.304757   0.649767  -2.008  0.04466 *  
## student     -7.804755   1.324278  -5.894 3.88e-09 ***
## hlthplan    -1.832259   0.498734  -3.674  0.00024 ***
## exercise    -2.018860   0.401007  -5.034 4.86e-07 ***
## smoke        6.191872   0.430883  14.370  < 2e-16 ***
## drink       -2.891994   0.261827 -11.045  < 2e-16 ***
## genhlth_fa   2.503150   0.549823   4.553 5.35e-06 ***
## genhlth_po  11.963037   0.764544  15.647  < 2e-16 ***
## qlactlm      2.369105   0.440025   5.384 7.43e-08 ***
## pm2.5       -0.001208   0.013184  -0.092  0.92698    
## ozone       34.062301   5.574115   6.111 1.02e-09 ***
## greenness    2.956187   0.257457  11.482  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.769 on 11488 degrees of freedom
##   (5381 observations deleted due to missingness)
## Multiple R-squared:  0.3034, Adjusted R-squared:  0.3024 
## F-statistic: 312.7 on 16 and 11488 DF,  p-value: < 2.2e-16

delete educ2:

mod20 = lm(menthlth ~  income7 + employed + out_of_work + homemaker + student + hlthplan + exercise + smoke + drink + genhlth_fa + genhlth_po + qlactlm + pm2.5 + ozone + greenness,data = reg_data)
summary(mod20)
## 
## Call:
## lm(formula = menthlth ~ income7 + employed + out_of_work + homemaker + 
##     student + hlthplan + exercise + smoke + drink + genhlth_fa + 
##     genhlth_po + qlactlm + pm2.5 + ozone + greenness, data = reg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.680  -1.299  -0.054   1.161  22.680 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.8950021  0.6771652  16.089  < 2e-16 ***
## income7     -0.9797097  0.4622734  -2.119 0.034084 *  
## employed    -1.8655613  0.3567650  -5.229 1.73e-07 ***
## out_of_work  3.9186037  0.8803965   4.451 8.63e-06 ***
## homemaker   -1.2810520  0.6496960  -1.972 0.048660 *  
## student     -8.0383517  1.3179967  -6.099 1.10e-09 ***
## hlthplan    -1.8849753  0.4979174  -3.786 0.000154 ***
## exercise    -2.1674331  0.3924119  -5.523 3.40e-08 ***
## smoke        6.3684068  0.4195520  15.179  < 2e-16 ***
## drink       -3.0081166  0.2537350 -11.855  < 2e-16 ***
## genhlth_fa   2.5707795  0.5485835   4.686 2.82e-06 ***
## genhlth_po  11.9444866  0.7645486  15.623  < 2e-16 ***
## qlactlm      2.3221602  0.4392893   5.286 1.27e-07 ***
## pm2.5       -0.0001499  0.0131717  -0.011 0.990923    
## ozone       33.5504328  5.5673557   6.026 1.73e-09 ***
## greenness    2.9562341  0.2574818  11.481  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.769 on 11489 degrees of freedom
##   (5381 observations deleted due to missingness)
## Multiple R-squared:  0.3032, Adjusted R-squared:  0.3023 
## F-statistic: 333.2 on 15 and 11489 DF,  p-value: < 2.2e-16

this is the final regression model considering the relationship:

if we delete ozone:

mod17 = lm(menthlth ~ black + educ2 + income1 + income4 + married + employed + out_of_work + homemaker + student + hlthplan + smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + pm2.5,data = reg_data)
summary(mod17)
## 
## Call:
## lm(formula = menthlth ~ black + educ2 + income1 + income4 + married + 
##     employed + out_of_work + homemaker + student + hlthplan + 
##     smoke + drink + genhlth_ex + genhlth_fa + genhlth_po + qlactlm + 
##     pm2.5, data = reg_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3218  -1.0317  -0.0736   0.9414  21.7154 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.74686    0.93724  14.667  < 2e-16 ***
## black        0.37834    0.11252   3.363 0.000778 ***
## educ2        1.53540    0.42476   3.615 0.000304 ***
## income1     -5.86823    1.01930  -5.757 9.09e-09 ***
## income4     -1.72439    0.76284  -2.260 0.023836 *  
## married     -1.03376    0.44919  -2.301 0.021414 *  
## employed    -3.26315    0.49272  -6.623 3.92e-11 ***
## out_of_work  4.40985    1.18811   3.712 0.000208 ***
## homemaker   -2.56811    0.92069  -2.789 0.005303 ** 
## student     -4.62055    1.98946  -2.323 0.020247 *  
## hlthplan    -3.08556    0.71042  -4.343 1.43e-05 ***
## smoke        5.32836    0.61954   8.601  < 2e-16 ***
## drink       -3.83040    0.35301 -10.851  < 2e-16 ***
## genhlth_ex   2.15958    0.74362   2.904 0.003700 ** 
## genhlth_fa   2.72990    0.81743   3.340 0.000845 ***
## genhlth_po  15.63954    1.13876  13.734  < 2e-16 ***
## qlactlm      3.72105    0.63389   5.870 4.65e-09 ***
## pm2.5        0.03442    0.01414   2.434 0.014984 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.069 on 4777 degrees of freedom
##   (12091 observations deleted due to missingness)
## Multiple R-squared:  0.4007, Adjusted R-squared:  0.3986 
## F-statistic: 187.9 on 17 and 4777 DF,  p-value: < 2.2e-16

aggregate into state level:

st.codes<-data.frame(
  state=c(1,2,4,5,6,8,9,10,11,12,13,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,44,45,46,47,48,49,50,51,53,54,55,56,72),full=c("alabama","alaska","arizona","arkansas","california","colorado","connecticut","delaware","district of columbia","florida","georgia","hawaii","idaho","illinois","indiana","iowa","kansas","kentucky","louisiana","maine","maryland","massachusetts","michigan","minnesota","mississippi","missouri","montana", "nebraska","nevada","new hampshire","new jersey","new mexico","new york","north carolina","north dakota","ohio","oklahoma","oregon","pennsylvania","rhode island","south carolina","south dakota","tennessee","texas","utah","vermont","virginia","washington","west virginia","wisconsin","wyoming","puerto rico"))

write state level data for 2000:

library("SASxport")
library(dplyr)
data_00 <-read.xport("CDBRFS00.XPT")
data <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)

NA

data$CTYCODE[data$CTYCODE %in% c(777,999)] <- c(NA)
data$MENTHLTH[data$MENTHLTH %in% c(77,88,99)] <- c(NA) 
data$CVDINFAR[data$CVDINFAR %in% c(7,9)] <- c(NA) 
data$CVDCORHD[data$CVDCORHD %in% c(7,9)] <- c(NA) 
data$CVDSTROK[data$CVDSTROK %in% c(7,9)] <- c(NA) 
data$ASTHMA[data$ASTHMA %in% c(7,9)] <- c(NA) 
data$AGE[data$AGE %in% c(7,9)] <- c(NA) 
data$ORACE[data$ORACE %in% c(7,9)] <- c(NA)
data$EDUCA[data$EDUCA == 9] <- c(NA) 
data$INCOME2[data$INCOME2 %in% c(77,99)] <- c(NA) 
data$MARITAL[data$MARITAL == 9] <- c(NA) 
data$EMPLOY[data$EMPLOY == 9] <- c(NA) 
data$HLTHPLAN[data$HLTHPLAN %in% c(7,9)] <- c(NA) 
data$EXERANY[data$EXERANY %in% c(7,9)] <- c(NA) 
data$X.RFSMOK2[data$X.RFSMOK2 == 9] <- c(NA) 
data$DRINKANY[data$DRINKANY %in% c(7,9)] <- c(NA) 
data$X.BMI2CAT[data$X.BMI2CAT == 9] <- c(NA) 
data$X.BMI2[data$X.BMI2 == 999] <- c(NA) 
data$GENHLTH[data$GENHLTH %in% c(7,9)] <- c(NA) 
data$QLACTLM2[data$QLACTLMT %in% c(7,9)] <- c(NA) 

SELECT

data00 <-data %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2000,
         menthlth = mean(MENTHLTH, na.rm= TRUE),
         heartattack = mean(CVDINFAR == 1, na.rm = TRUE),
         ACheartdis = mean(CVDCORHD == 1, na.rm = TRUE),
         stroke = mean(CVDSTROK == 1, na.rm = TRUE),
         asthma = mean(ASTHMA == 1, na.rm = TRUE),
         age = mean(AGE, na.rm = TRUE),
         sex = mean(SEX == 2, na.rm = TRUE),
         white = mean(ORACE == 1, na.rm = TRUE),
         black = mean(ORACE == 2, na.rm = TRUE),
         asian = mean(ORACE == 3, na.rm = TRUE),
         race_other = mean(ORACE == 4 | ORACE == 5 | ORACE == 6, na.rm = TRUE),
         educ1 = mean(EDUCA == 1|EDUCA == 2, na.rm = TRUE),
         educ2 = mean(EDUCA == 3|EDUCA == 4, na.rm = TRUE),
         educ3 = mean(EDUCA == 5|EDUCA == 6, na.rm = TRUE),
         income1 = mean(INCOME2 == 1, na.rm = TRUE),
         income2 = mean(INCOME2 == 2, na.rm = TRUE),
         income3 = mean(INCOME2 == 3, na.rm = TRUE),
         income4 = mean(INCOME2 == 4, na.rm = TRUE),
         income5 = mean(INCOME2 == 5, na.rm = TRUE),
         income6 = mean(INCOME2 == 6, na.rm = TRUE),
         income7 = mean(INCOME2 == 7, na.rm = TRUE),
         income8 = mean(INCOME2 == 8, na.rm = TRUE),
         married = mean(MARITAL == 1, na.rm = TRUE),
         unmarried = mean(MARITAL == 2| MARITAL == 3| MARITAL == 4| MARITAL == 5| MARITAL == 6, na.rm = TRUE),
         employed = mean(EMPLOY == 1 | EMPLOY == 2, na.rm = TRUE),
         out_of_work = mean(EMPLOY ==3 | EMPLOY ==4, na.rm = TRUE),
         homemaker = mean(EMPLOY ==5, na.rm = TRUE),
         student = mean(EMPLOY == 6, na.rm = TRUE),
         employ_other = mean(EMPLOY ==7 | EMPLOY ==8, na.rm = TRUE),
         hlthplan = mean(HLTHPLAN == 1, na.rm = TRUE),
         exercise = mean(EXERANY == 1, na.rm = TRUE),
         smoke = mean(X.RFSMOK2 == 2, na.rm = TRUE),
         drink = mean(DRINKANY == 1, na.rm = TRUE),
         bmi_cat1 = mean(X.BMI2CAT == 1, na.rm =TRUE),
         bmi_cat2 = mean(X.BMI2CAT == 2, na.rm = TRUE),
         bmi_cat3 = mean(X.BMI2CAT == 3, na.rm = TRUE),
         bmi_cts = mean(X.BMI2,na.rm = TRUE),
         genhlth_ex = mean(GENHLTH == 1, na.rm = TRUE),
         genhlth_vg = mean(GENHLTH == 2, na.rm = TRUE),
         genhlth_go = mean(GENHLTH == 3, na.rm = TRUE),
         genhlth_fa = mean(GENHLTH == 4, na.rm = TRUE),
         genhlth_po = mean(GENHLTH == 5, na.rm = TRUE),
         qlactlm = mean(QLACTLMT == 1, na.rm = TRUE))
colnames(data00)[1] = "state"
data00 = data00 %>%
  left_join(st.codes,by = c("state"="state"))
write.csv(data00, file = "state_00.csv")

Mental health patterns in 2000 for all US states in a map:

library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")

data00 = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
##   state = col_integer(),
##   year = col_double(),
##   menthlth = col_double(),
##   full = col_character()
## )
state_00 = data00 %>%
  dplyr::select(full,menthlth)

max(state_00$menthlth)
## [1] NaN
min(state_00$menthlth)
## [1] NaN
ggplot(state_00,aes(map_id = full)) +
  geom_map(aes(fill = menthlth),map = fifty_states) +
  expand_limits(x = fifty_states$long,y = fifty_states$lat) +
  coord_map() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_y_continuous(expand=c(0,0)) +
  scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
                      breaks = seq(7,18,,length.out = 5),
                      limits = c(7,18),
                      labels = seq(7,18,length.out = 5)) +
  ylab("") + 
  xlab("")

Select average menthlth for each state across years and write it into a csv file, and then plot mental health across states:

library(SASxport)
data_00 <-read.xport("CDBRFS00.XPT")
data_00 <- data_00 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFAR,CVDCORHD,CVDSTROK,ASTHMA,AGE,SEX,ORACE,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY,X.RFSMOK2,DRINKANY,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLMT)

men_00_state = data_00 %>%
  mutate(year = 2000) %>%
  select(X.STATE,MENTHLTH,year)

men_00_state$MENTHLTH[men_00_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_00_state <- men_00_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2000,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_01 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS01.XPT")
data_01 <- data_01 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)

men_01_state = data_01 %>%
  mutate(year = 2001) %>%
  select(X.STATE,MENTHLTH,year)

men_01_state$MENTHLTH[men_01_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_01_state <- men_01_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2001,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_02 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs02.xpt")
data_02 <- data_02 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI2CAT,X.BMI2,GENHLTH,QLACTLM2)

men_02_state = data_02 %>%
  mutate(year = 2002) %>%
  select(X.STATE,MENTHLTH,year)

men_02_state$MENTHLTH[men_02_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_02_state <- men_02_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2002,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_03 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/cdbrfs03.xpt")
data_03 <- data_03 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,IMONTH,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI3CAT,X.BMI3,GENHLTH,QLACTLM2)

men_03_state = data_03 %>%
  mutate(year = 2003) %>%
  select(X.STATE,MENTHLTH,year)

men_03_state$MENTHLTH[men_03_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_03_state <- men_03_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2003,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_04 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS04.XPT")
data_04 <- data_04 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,MENTHLTH,CVDINFR2,CVDCRHD2,CVDSTRK2,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK2,DRNKANY3,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)

men_04_state = data_04 %>%
  mutate(year = 2004) %>%
  select(X.STATE,MENTHLTH,year)

men_04_state$MENTHLTH[men_04_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_04_state <- men_04_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2004,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_05 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS05.XPT")
data_05 <- data_05 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)

men_05_state = data_05 %>%
  mutate(year = 2005) %>%
  select(X.STATE,MENTHLTH,year)

men_05_state$MENTHLTH[men_05_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_05_state <- men_05_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2005,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_06 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS06.XPT")
data_06 <- data_06 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips, X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR3,CVDCRHD3,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)

men_06_state = data_06 %>%
  mutate(year = 2006) %>%
  select(X.STATE,MENTHLTH,year)

men_06_state$MENTHLTH[men_06_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_06_state <- men_06_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2006,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_07 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS07.XPT")
data_07 <- data_07 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips,X.STATE,CTYCODE,MSCODE,IMONTH,MENTHLTH,CVDINFR4,CVDCRHD4,CVDSTRK3,ASTHMA2,AGE,SEX,ORACE2,EDUCA,INCOME2,MARITAL,EMPLOY,HLTHPLAN,EXERANY2,X.RFSMOK3,DRNKANY4,X.BMI4CAT,X.BMI4,GENHLTH,QLACTLM2)

men_07_state = data_07 %>%
  mutate(year = 2007) %>%
  select(X.STATE,MENTHLTH,year)

men_07_state$MENTHLTH[men_07_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_07_state <- men_07_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2007,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_08 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS08.XPT")
data_08 <- data_08 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)

men_08_state = data_08 %>%
  mutate(year = 2008) %>%
  select(X.STATE,MENTHLTH,year)

men_08_state$MENTHLTH[men_08_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_08_state <- men_08_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2008,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_09 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS09.XPT")
data_09 <- data_09 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips,X.STATE, CTYCODE, MSCODE,IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)

men_09_state = data_09 %>%
  mutate(year = 2009) %>%
  select(X.STATE,MENTHLTH,year)

men_09_state$MENTHLTH[men_09_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_09_state <- men_09_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2009,
         menthlth = mean(MENTHLTH, na.rm= TRUE))
data_10 <- read.xport("~/Desktop/Harvard/fall 2017/bst260/project/CDBRFS10.XPT")
data_10 <- data_10 %>% mutate(fips=X.STATE * 1000 + CTYCODE) %>%
  filter(!is.na(fips)) %>%
  select(fips,X.STATE, CTYCODE, IYEAR,MENTHLTH, CVDINFR4, CVDCRHD4,CVDSTRK3, ASTHMA2, AGE, SEX, ORACE2, EDUCA, INCOME2, MARITAL, EMPLOY, HLTHPLAN, EXERANY2, X.RFSMOK3,DRNKANY4, X.BMI4CAT, X.BMI4, GENHLTH, QLACTLM2)

men_10_state = data_10 %>%
  mutate(year = 2010) %>%
  select(X.STATE,MENTHLTH,year)

men_10_state$MENTHLTH[men_10_state$MENTHLTH %in% c(77,88,99)] <- c(NA) 

men_10_state <- men_10_state %>% group_by(X.STATE) %>%
         dplyr::summarise(year = 2010,
         menthlth = mean(MENTHLTH, na.rm= TRUE))

plot all years together:

men_state = rbind(men_00_state,men_01_state,men_02_state,men_03_state,men_04_state,men_05_state,men_06_state,men_07_state,men_08_state,men_09_state,men_10_state)
colnames(men_state)[1] = "state"
men_state = men_state %>%
  left_join(st.codes,by = c("state"="state"))

write_csv(men_state,"state_menthlth.csv")
library(fiftystater)
library(RColorBrewer)
library(mapproj)
data("fifty_states")
men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
##   state = col_integer(),
##   year = col_double(),
##   menthlth = col_double(),
##   full = col_character()
## )
state_map = men_state %>%
  dplyr::select(full,menthlth,year)

max(state_map$menthlth,na.rm = T)
## [1] 18.61704
min(state_map$menthlth,na.rm = T)
## [1] 7.455927
ggplot(state_map,aes(map_id = full)) +
  geom_map(aes(fill = menthlth),map = fifty_states) +
  expand_limits(x = fifty_states$long,y = fifty_states$lat) +
  coord_map() +
  scale_x_continuous(expand=c(0,0)) +  
  scale_y_continuous(expand=c(0,0)) +
  scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(9,"RdPu"))(5)),
                      breaks = seq(7,19,,length.out = 5),
                      limits = c(7,19),
                      labels = seq(7,19,length.out = 5)) +
  ylab("") + 
  xlab("") +
  facet_wrap(~year,ncol = 3) +
  ggtitle("Number of Days Mental Health Not Good for each US state from 2000 to 2010")

Run regression for different years and get a univariate plot each year:

library(readr)
library(dplyr)
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: purrr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
## map():    purrr, maps
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer(),
##   year = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit = data_all %>%
  dplyr::select(-c(fips,state,county)) %>%
  ggplot(aes(pm2.5, menthlth)) +
  geom_point(alpha = 0.2,col = "dodgerblue1") +
  geom_smooth(method = "lm",col = "black") +
  facet_wrap(~ year) +
  ggtitle("mental health vs pm 2.5")
group_men_fit

group_men_fit1 = data_all %>%
  dplyr::select(-c(fips,state,county)) %>%
  ggplot(aes(menthlth,ozone)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ year) +
  ggtitle("mental health vs ozone")
group_men_fit1

univariate analysis for menthlth and pm 2.5:

data_all %>%
  group_by(year) %>%
  do(tidy(lm(menthlth ~ pm2.5, data = .),conf.int = TRUE)) %>%
  filter(!grepl("Intercept", term))
## # A tibble: 11 x 8
## # Groups:   year [11]
##     year  term  estimate  std.error statistic      p.value   conf.low
##    <int> <chr>     <dbl>      <dbl>     <dbl>        <dbl>      <dbl>
##  1  2000 pm2.5 0.2509847 0.03115326  8.056450 3.212205e-15 0.18982374
##  2  2001 pm2.5 0.2349882 0.03506758  6.701009 3.889138e-11 0.16615350
##  3  2002 pm2.5 0.4076348 0.05853970  6.963390 1.331356e-11 0.29255856
##  4  2003 pm2.5 0.1898703 0.03484784  5.448552 6.337765e-08 0.12149025
##  5  2004 pm2.5 0.2286335 0.02852119  8.016269 2.633883e-15 0.17267485
##  6  2005 pm2.5 0.2030903 0.02435099  8.340126 1.957300e-16 0.15531651
##  7  2006 pm2.5 0.2991832 0.03221822  9.286151 7.911323e-20 0.23596916
##  8  2007 pm2.5 0.3296625 0.03427715  9.617557 1.777878e-21 0.26244336
##  9  2008 pm2.5 0.2109055 0.04081043  5.167931 2.579768e-07 0.13087465
## 10  2009 pm2.5 0.1792752 0.05146552  3.483405 5.047276e-04 0.07834929
## 11  2010 pm2.5 0.2650898 0.03630722  7.301296 3.960809e-13 0.19388981
## # ... with 1 more variables: conf.high <dbl>

heatmap for menthlth across US states from 2000 to 2010:

library(RColorBrewer)

men_state = read_csv("state_menthlth.csv")
## Parsed with column specification:
## cols(
##   state = col_integer(),
##   year = col_double(),
##   menthlth = col_double(),
##   full = col_character()
## )
men_state %>% filter(!is.na(menthlth) & !is.na(full)) %>% 
  ggplot(aes(x = year, y = full,fill = menthlth)) + 
  geom_tile(color = "grey50") +
  scale_x_continuous(expand=c(0,0)) +  
  scale_fill_gradientn(colors = brewer.pal(9, "YlOrRd"), trans = "sqrt") +
  ylab("state") + 
  xlab("year") +
  ggtitle("Heatmap of average menthlth in US states from 2000 to 2010")

deal with raw ndvi data and study the relationship between greenness and menthlth:

data_all = read_csv("data_exposure_00_10.csv")

ndvi_county = ndvi %>%
  group_by(FIPS,year) %>%
  summarise(greenness = mean(ndvimean_combined))

data_all = data_all %>%
  left_join(ndvi_county,by=c("fips"="FIPS","year"="year"))

write_csv(data_all,"data_3_expo.csv")
data_all = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer()
## )
## See spec(...) for full column specifications.
group_men_fit2 = data_all %>%
  dplyr::select(-c(fips,state,county)) %>%
  ggplot(aes(menthlth,greenness)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~ year)
group_men_fit2

Draw the Pearson correlation map:

data = read_csv("~/Desktop/Harvard/fall 2017/bst260/project/data_3_expo.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer()
## )
## See spec(...) for full column specifications.
data = data %>% 
  dplyr::select(income7,employed,out_of_work,homemaker,student,hlthplan,exercise,smoke, drink,genhlth_fa ,genhlth_po ,qlactlm,pm2.5 ,ozone,greenness)

data = data[complete.cases(data),]
cormat <- round(cor(data),2)
head(cormat)
##             income7 employed out_of_work homemaker student hlthplan
## income7        1.00     0.31       -0.11     -0.05    0.04     0.25
## employed       0.31     1.00       -0.25     -0.26    0.07     0.17
## out_of_work   -0.11    -0.25        1.00     -0.07   -0.03    -0.21
## homemaker     -0.05    -0.26       -0.07      1.00   -0.02    -0.14
## student        0.04     0.07       -0.03     -0.02    1.00    -0.10
## hlthplan       0.25     0.17       -0.21     -0.14   -0.10     1.00
##             exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7         0.26 -0.13  0.29      -0.24      -0.28   -0.22 -0.05  0.02
## employed        0.42 -0.12  0.51      -0.43      -0.49   -0.53 -0.05  0.09
## out_of_work    -0.10  0.09 -0.07       0.11       0.08    0.09  0.01 -0.06
## homemaker      -0.10  0.03 -0.21       0.08       0.09    0.04  0.07  0.13
## student         0.14  0.04  0.06      -0.08      -0.09   -0.14  0.07  0.08
## hlthplan        0.25 -0.29  0.34      -0.26      -0.27   -0.14  0.03 -0.05
##             greenness
## income7         -0.13
## employed        -0.25
## out_of_work      0.07
## homemaker       -0.03
## student         -0.05
## hlthplan        -0.04
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
melted_cormat <- melt(cormat)
head(melted_cormat)
##          Var1    Var2 value
## 1     income7 income7  1.00
## 2    employed income7  0.31
## 3 out_of_work income7 -0.11
## 4   homemaker income7 -0.05
## 5     student income7  0.04
## 6    hlthplan income7  0.25
library(ggplot2)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + 
  geom_tile()

get_lower_tri<-function(cormat){
    cormat[upper.tri(cormat)] <- NA
    return(cormat)
  }
  # Get upper triangle of the correlation matrix
  get_upper_tri <- function(cormat){
    cormat[lower.tri(cormat)]<- NA
    return(cormat)
  }
upper_tri <- get_upper_tri(cormat)
upper_tri
##             income7 employed out_of_work homemaker student hlthplan
## income7           1     0.31       -0.11     -0.05    0.04     0.25
## employed         NA     1.00       -0.25     -0.26    0.07     0.17
## out_of_work      NA       NA        1.00     -0.07   -0.03    -0.21
## homemaker        NA       NA          NA      1.00   -0.02    -0.14
## student          NA       NA          NA        NA    1.00    -0.10
## hlthplan         NA       NA          NA        NA      NA     1.00
## exercise         NA       NA          NA        NA      NA       NA
## smoke            NA       NA          NA        NA      NA       NA
## drink            NA       NA          NA        NA      NA       NA
## genhlth_fa       NA       NA          NA        NA      NA       NA
## genhlth_po       NA       NA          NA        NA      NA       NA
## qlactlm          NA       NA          NA        NA      NA       NA
## pm2.5            NA       NA          NA        NA      NA       NA
## ozone            NA       NA          NA        NA      NA       NA
## greenness        NA       NA          NA        NA      NA       NA
##             exercise smoke drink genhlth_fa genhlth_po qlactlm pm2.5 ozone
## income7         0.26 -0.13  0.29      -0.24      -0.28   -0.22 -0.05  0.02
## employed        0.42 -0.12  0.51      -0.43      -0.49   -0.53 -0.05  0.09
## out_of_work    -0.10  0.09 -0.07       0.11       0.08    0.09  0.01 -0.06
## homemaker      -0.10  0.03 -0.21       0.08       0.09    0.04  0.07  0.13
## student         0.14  0.04  0.06      -0.08      -0.09   -0.14  0.07  0.08
## hlthplan        0.25 -0.29  0.34      -0.26      -0.27   -0.14  0.03 -0.05
## exercise        1.00 -0.31  0.58      -0.47      -0.48   -0.39 -0.20 -0.01
## smoke             NA  1.00 -0.24       0.24       0.28    0.21  0.17  0.09
## drink             NA    NA  1.00      -0.48      -0.54   -0.40 -0.21 -0.13
## genhlth_fa        NA    NA    NA       1.00       0.25    0.40  0.12 -0.01
## genhlth_po        NA    NA    NA         NA       1.00    0.51  0.14  0.03
## qlactlm           NA    NA    NA         NA         NA    1.00  0.00 -0.06
## pm2.5             NA    NA    NA         NA         NA      NA  1.00  0.24
## ozone             NA    NA    NA         NA         NA      NA    NA  1.00
## greenness         NA    NA    NA         NA         NA      NA    NA    NA
##             greenness
## income7         -0.13
## employed        -0.25
## out_of_work      0.07
## homemaker       -0.03
## student         -0.05
## hlthplan        -0.04
## exercise        -0.18
## smoke            0.16
## drink           -0.25
## genhlth_fa       0.17
## genhlth_po       0.24
## qlactlm          0.20
## pm2.5            0.36
## ozone           -0.27
## greenness        1.00
# Melt the correlation matrix
library(reshape2)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Heatmap
library(ggplot2)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
   name="Pearson\nCorrelation") +
  theme_minimal()+ 
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()

reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
# Reorder the correlation matrix
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
 geom_tile(color = "white")+
 scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
   midpoint = 0, limit = c(-1,1), space = "Lab", 
    name="Pearson\nCorrelation") +
  theme_minimal()+ # minimal theme
 theme(axis.text.x = element_text(angle = 45, vjust = 1, 
    size = 12, hjust = 1))+
 coord_fixed()
# Print the heatmap
print(ggheatmap)

ggheatmap + 
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  axis.ticks = element_blank(),
  legend.justification = c(1, 0),
  legend.position = c(0.6, 0.7),
  legend.direction = "horizontal")+
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
                title.position = "top", title.hjust = 0.5))

try to draw the fitted line vs actual value in 2010: firstly I want to label each state but it turns out there are too much so I give up

library(ggrepel)
library(dplyr)
ds_theme_set()

data_all = read.csv("data_3_expo.csv")
st.codes<-data.frame(state=as.factor(c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS",  "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")),
  full=as.factor(c("alaska","alabama","arkansas","arizona","california","colorado", "connecticut","district of columbia","delaware","florida","georgia","hawaii","iowa","idaho","illinois","indiana","kansas","kentucky","louisiana","massachusetts","maryland","maine","michigan","minnesota","missouri","mississippi","montana","north carolina","north dakota","nebraska","new hampshire","new jersey","new mexico","nevada","new york","ohio","oklahoma","oregon","pennsylvania","puerto rico","rhode island","south carolina","south dakota","tennessee","texas","utah","virginia","vermont","washington","wisconsin","west virginia","wyoming")))

highlight <- c("AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA","HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME","MI", "MN", "MO", "MS",  "MT", "NC", "ND", "NE", "NH", "NJ", "NM","NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY")


fips_data = county.fips %>%
  separate(polyname, c("state", "county"), ",")

data_all = data_all %>%
  left_join(fips_data,by = c("fips"="fips")) %>%
  left_join(st.codes,by = c("state.y" = "full"))

data_all_10 = data_all %>%
  dplyr::filter(year == 2010) %>%
  mutate(menthlth_hat = predict(mod20, newdata = .))

data_all_10 %>%
  ggplot(aes(menthlth_hat,menthlth,col = state,label = state)) +
  geom_point(show.legend = F) +
  geom_abline() +
  ggtitle("fitted values vs actual values of menthlth in 2010")

Univariate analysis for income vs asthma:

library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer(),
##   year = col_integer()
## )
## See spec(...) for full column specifications.
group_as_fit = data_all %>%
  dplyr::select(-c(fips,state,county)) %>%
  ggplot(aes(income1,asthma)) +
  geom_point(alpha = 0.2,col = "darkorchid2") +
  geom_smooth(method = "lm",col = "black") +
  facet_wrap(~ year) +
  ggtitle("income1 vs asthma") +
  xlim(0,0.75) +
  ylim(0,0.5)
group_as_fit

Univariate analysis for heartattack vs smoke:

library(readr)
library(dplyr)
library(tidyverse)
library(broom)
library(dslabs)
ds_theme_set()
data_all = read_csv("data_exposure_00_10.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   fips = col_integer(),
##   state = col_integer(),
##   county = col_integer(),
##   year = col_integer()
## )
## See spec(...) for full column specifications.
group_he_fit = data_all %>%
  filter(year %in% c(2005,2006,2007,2008,2009,2010)) %>%
  dplyr::select(-c(fips,state,county)) %>%
  ggplot(aes(smoke,heartattack)) +
  geom_point(alpha = 0.2,col = "indianred1") +
  geom_smooth(method = "lm",col = "black") +
  facet_wrap(~ year) +
  ggtitle("smoke vs heartattack") +
  xlim(0,0.6) + 
  ylim(0,0.5)
group_he_fit